R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(ggplot2)
library(RColorBrewer)
load("/Users/w435u/Documents/ST_SC/DATA_STSC_CAO/Seurat_C2.RData")

Including Plots

SpatialFeaturePlot(st_dataset, "pred_st")+
  scale_fill_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu"))[1:11])
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

You can also embed plots, for example:

df <- st_dataset@meta.data

# Create plot
library(ggpubr)
p1 <- ggplot(df, aes(x = cnv_score, y = pred_st)) +
  geom_point(alpha=0.5) +  # Scatter plot
  geom_smooth(method = "lm", se = TRUE, color = "#ae0000") +  # Fitted regression line
  stat_cor(method = "pearson", digits = 2) +  # Correlation and p-value
  theme_minimal()+
  scale_y_continuous(limits = c(0, 1))+ylab("Malignancy") + xlab("CNV score")

p2 <- ggplot(df, aes(x = tumor_sig1, y = pred_st)) +
  geom_point(alpha=0.5) +  # Scatter plot
  geom_smooth(method = "lm", se = TRUE, color = "#ae0000") +  # Fitted regression line
  stat_cor(method = "pearson") +  # Correlation and p-value
  theme_minimal()+
  scale_y_continuous(limits = c(0, 1))+ylab("Malignancy")+ xlab("Tumor signature")

p3 <- ggplot(df, aes(x = pEMT1, y = pred_st)) +
  geom_point(alpha=0.5) +  # Scatter plot
  geom_smooth(method = "lm", se = TRUE, color = "#ae0000") +  # Fitted regression line
  stat_cor(method = "pearson") +  # Correlation and p-value
  theme_minimal()+
  scale_y_continuous(limits = c(0, 1))+ylab("Malignancy")+ xlab("pEMT score")

p <- p1+p2+p3
pdf(file=paste0("~/Documents/GitHub/SCTP_iMETA/Fig2/corr_", "Q3", ".pdf"), width=12, height=6)
print(p)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 364 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 364 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 364 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
p
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 364 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 364 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 364 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Dot plot of different

pred_st <- st_dataset@meta.data$pred_st
pred_cat <- as.character(cut(pred_st, 
                             breaks = c(-Inf, quantile(pred_st, c(0.25, 0.5, 0.75, 1))), 
                             labels = c("C1",  "C2", "C3", "C4")))
st_dataset <- AddMetaData(st_dataset, metadata = pred_cat, col.name = "predcat")

#tumor_enriched <- FindMarkers(st_dataset, ident.1 = "C4",ident.2 = "C1", group.by = 'predcat', verbose = FALSE)

#write.csv(tumor_enriched, file="~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_P3.csv")
tumor_enriched <- read.csv( file="~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_P3.csv", row.names = 1) 

genes <- rownames(tumor_enriched)[1:40]

p <- DotPlot(object = st_dataset, features =genes, group.by = "predcat")
## Warning: Scaling data with a low number of groups may produce misleading
## results
df <- p$data
#df$features.plot <- rownames(df)
exp_mat<-df %>% 
  dplyr::select(-pct.exp, -avg.exp) %>%  
  tidyr::pivot_wider(names_from = id, values_from = avg.exp.scaled) %>% 
  as.data.frame() 
exp_mat <- exp_mat[!is.na(exp_mat$features.plot), ]
row.names(exp_mat) <- exp_mat$features.plot  
exp_mat <- exp_mat[,-1] %>% data.matrix()

percent_mat<-df %>% 
  dplyr::select(-avg.exp, -avg.exp.scaled) %>%  
  tidyr::pivot_wider(names_from = id, values_from = pct.exp) %>% 
  as.data.frame() 

percent_mat <- percent_mat[!is.na(percent_mat$features.plot), ]
row.names(percent_mat) <- percent_mat$features.plot  
percent_mat <- percent_mat[,-1] %>% data.matrix()

library(viridis)
## Loading required package: viridisLite
col_fun = circlize::colorRamp2(c(-1, 0, 2), plasma(20)[c(1,10, 20)])
cell_fun = function(j, i, x, y, w, h, fill){
  grid.rect(x = x, y = y, width = w, height = h, 
            gp = gpar(col = NA, fill = NA))
  grid.circle(x=x,y=y,r= percent_mat[i, j]/100 * min(unit.c(w, h)),
              gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA))}

write.csv(exp_mat, file="~/Documents/GitHub/SCTP_iMETA/Fig2/Expression_FIG4H.csv")

library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.20.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
dot <- Heatmap(exp_mat,
               heatmap_legend_param=list(title="expression"),
               column_title = "clustered dotplot", 
               col=col_fun,
               rect_gp = gpar(type = "none"),
               cell_fun = cell_fun,
               row_names_gp = gpar(fontsize = 10),
               row_km = 2,
               border = "black",
               cluster_columns = FALSE,column_order = c("C4","C3","C2", "C1"))


dot

pdf("~/Documents/GitHub/SCTP_iMETA/Fig2/Dotplot_Q3_degs.pdf")
print(dot)
dev.off()
## quartz_off_screen 
##                 2

Differential expression between different annotated groups

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
load("/Users/w435u/Documents/ST_SC/OUTPUT_CAO_new/Seurat_C4_subtumor.RData")
SpatialDimPlot(st_dataset, "T12")

#tumor_enriched1 <- FindMarkers(st_dataset, ident.1 = "T2",ident.2 = "T1", group.by = 'T12', verbose = FALSE)
#tumor_enriched2 <- FindMarkers(st_dataset, ident.1 = "T1",ident.2 = "Other", group.by = 'T12', verbose = FALSE)
#tumor_enriched3 <- FindMarkers(st_dataset, ident.1 = "T2",ident.2 = "Other", group.by = 'T12', verbose = FALSE)

#write.csv(tumor_enriched1, file="~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_1.csv")
#write.csv(tumor_enriched2, file="~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_2.csv")
#write.csv(tumor_enriched3, file="~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_3.csv")

tumor_enriched1 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_1.csv", row.names = 1)
tumor_enriched2 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_2.csv", row.names = 1)
tumor_enriched3 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_3.csv", row.names = 1)

subgenes1 <- tumor_enriched1%>% filter(p_val<10e-14)%>% filter(abs(avg_log2FC)>1.5)
subgenes1$Group <- "T2 v.s T1"
subgenes2 <- tumor_enriched2%>% filter(p_val<10e-14)%>% filter(abs(avg_log2FC)>1.5)
subgenes2$Group <- "T1 v.s Other"
subgenes3 <- tumor_enriched3%>% filter(p_val<10e-14)%>% filter(abs(avg_log2FC)>1.5)
subgenes3$Group <- "T2 v.s Other"

suball <- rbind(subgenes1, subgenes2, subgenes3)
suball$gene <- rownames(suball)
suball$label <- ifelse(suball$avg_log2FC < (-0.5), '#0068B2',
                       ifelse(suball$avg_log2FC > 0.5, '#B2182B',
                              'black'))
dbar <- suball %>% group_by(Group) %>% summarise_all(list(min=min, max=max))%>%
  dplyr::select(Group, avg_log2FC_min, avg_log2FC_max)

my_col3 = c( "black","#e60012", "#00a0e9")

library(ggrepel)
g <- ggplot()+
  geom_col(data = dbar,  # 绘制负向背景柱状图
           mapping = aes(x = Group,y = avg_log2FC_min),
           fill = "#dcdcdc",alpha = 0.6, width = 0.7) +
  geom_col(data = dbar, # 绘制正向背景柱状图
           mapping = aes(x = Group,y = avg_log2FC_max),
           fill = "#dcdcdc",alpha = 0.6, width = 0.7) +
  geom_jitter(data = suball, # 绘制所有数据点
              aes(x = Group, y = avg_log2FC, color = label),
              size = 0.85,
              width =0.3) +
  geom_tile(data = suball, # 绘制中心分组标记图
            aes(x = Group,
                y = 0,
                fill = Group),
            height=0.5,
            color = "black",
            alpha = 0.4,
            show.legend = F) +
  ggsci::scale_fill_npg(alpha=0.5) + # 自定义颜色
  scale_color_manual(values=rev(my_col3))+# 自定义颜色
  geom_text_repel(data = suball,  # 这里的filter很关键,筛选你想要标记的基因
                  aes(x = Group, y = avg_log2FC, label = gene),
                  size = 2, 
                  max.overlaps = getOption("ggrepel.max.overlaps", default = 15),
                  color = 'black',
                  force = 1.2,
                  arrow = arrow(length = unit(0.008, "npc"),
                                type = "open", ends = "last"))+
  labs(x="Cluster", y="Average logFC") +
  geom_text(data=suball, # 绘制中心分组标记图文本注释
            aes(x=Group, 
                y=0, 
                label=Group),
            size = 3,
            color ="white") +
  theme_minimal() + 
  theme(axis.title = element_text(size = 13,color = "black",face = "bold"),
        axis.line.y = element_line(color = "black",size = 1.2),
        axis.line.x = element_blank(),
        axis.text.x = element_blank(),
        panel.grid = element_blank(),
        legend.position = "top",
        legend.direction = "vertical",
        legend.justification = c(1,0),
        legend.text = element_text(size = 13))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g 
## Warning: ggrepel: 783 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

pdf("~/Documents/GitHub/SCTP_iMETA/Fig2/Dotplot_Q3_degs.pdf")
print(g)
## Warning: ggrepel: 775 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
dev.off()
## quartz_off_screen 
##                 2

Gene expression of top genes

load("/Users/w435u/Documents/ST_SC/OUTPUT_CAO_new/Seurat_C4_subtumor.RData")


tumor_enriched1 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_1.csv", row.names = 1)
tumor_enriched2 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_2.csv", row.names = 1)
tumor_enriched3 <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/DEGs_Q4_3.csv", row.names = 1)


genes <- unique(c(rownames(tumor_enriched1)[1:25],rownames(tumor_enriched2)[1:25]))

p <- DotPlot(object = st_dataset, features =genes, group.by = "T12")
## Warning: Scaling data with a low number of groups may produce misleading
## results
df <- p$data
exp_mat<-df %>%
  dplyr::select(-pct.exp, -avg.exp) %>%
  tidyr::pivot_wider(names_from = id, values_from = avg.exp.scaled) %>%
  as.data.frame()

exp_mat <- exp_mat[!is.na(exp_mat$features.plot), ]

row.names(exp_mat) <- exp_mat$features.plot
exp_mat <- exp_mat[,-1] %>% data.matrix()

percent_mat<-df %>%
  dplyr::select(-avg.exp, -avg.exp.scaled) %>%
  tidyr::pivot_wider(names_from = id, values_from = pct.exp) %>%
  as.data.frame()

percent_mat <- percent_mat[!is.na(percent_mat$features.plot), ]


row.names(percent_mat) <- percent_mat$features.plot
percent_mat <- percent_mat[,-1] %>% data.matrix()

 library(viridis)
 col_fun = circlize::colorRamp2(c(-1, 0, 2), plasma(20)[c(1,10, 20)])
 cell_fun = function(j, i, x, y, w, h, fill){
   grid.rect(x = x, y = y, width = w, height = h, 
             gp = gpar(col = NA, fill = NA))
   grid.circle(x=x,y=y,r= percent_mat[i, j]/100 * min(unit.c(w, h)),
               gp = gpar(fill = col_fun(exp_mat[i, j]), col = NA))}
# 
# write.csv(exp_mat, file="~/Documents/GitHub/SCTP_iMETA/Fig2/Expression_FIG5F.csv")
exp_mat <- read.csv("~/Documents/GitHub/SCTP_iMETA/Fig2/Expression_FIG5F.csv", row.names = 1)

dot <- Heatmap(exp_mat,
               heatmap_legend_param=list(title="expression"),
               column_title = "clustered dotplot", 
               col=col_fun,
               rect_gp = gpar(type = "none"),
               cell_fun = cell_fun,
               row_names_gp = gpar(fontsize = 10),
               row_km = 3,
               border = "black",
               cluster_columns = FALSE,column_order = c("T2","T1","Other"))
## Warning: The input is a data frame-like object, convert it to a matrix.
dot

pdf("~/Documents/GitHub/SCTP_iMETA/Fig2/Dotplot_Q4_degs.pdf")
dot
dev.off()
## quartz_off_screen 
##                 2